# ----
# TITLE: Replication R code for the research paper entitled: Sports mega-events and economic growth: A synthetic control approach
# Version: 1.0
# Data: 26.12.2021
#
# Authors: Michal Kobierecki, Michal Pierzgalski
# ----


# START
# devtools::install_github('xuyiqing/fastplm')
# devtools::install_github('xuyiqing/fect')

devtools::install_github('xuyiqing/gsynth')


# load packages
library(tidyverse)
library(tidymodels)
library(panelView)
library(gsynth)
library(ggplot2)
library(fect)

# Options
options(scipen = 999)
options(digits = 4)

# ------------------------------------------------------------------------------


# BEGIN WITH loading RData file (data set): data_rep_mega.RData !!!

# START the analysis -----------------------------------------------------------

# data_long <- mutate(data_long, expimp = export / import, lpop = log(pop))



# TREATMENT Setup

# a)

data_long <-
  mutate(data_long, TreatBRA = ifelse((year >= 2008 &
                                         cntry_code == "BRA"), 1, 0))
data_long <-
  mutate(data_long, TreatGBR = ifelse((year >= 2006 &
                                         cntry_code == "GBR"), 1, 0))
data_long <-
  mutate(data_long, TreatCAN = ifelse((year >= 2004 &
                                         cntry_code == "CAN"), 1, 0))
data_long <-
  mutate(data_long, TreatZAF = ifelse((year >= 2005 &
                                         cntry_code == "ZAF"), 1, 0))



# TREATMENT (2)
data_long <-
  mutate(data_long, TreatBRA = ifelse((year >= 2014 &
                                         cntry_code == "BRA"), 1, 0))
data_long <-
  mutate(data_long, TreatGBR = ifelse((year >= 2012 &
                                         cntry_code == "GBR"), 1, 0))
data_long <-
  mutate(data_long, TreatCAN = ifelse((year >= 2010 &
                                         cntry_code == "CAN"), 1, 0))
data_long <-
  mutate(data_long, TreatZAF = ifelse((year >= 2010 &
                                         cntry_code == "ZAF"), 1, 0))


# TREATMENT (3 - the construction phase)
data_long <-
  mutate(data_long, TreatBRA = ifelse((year >= 2011 &
                                         cntry_code == "BRA"), 1, 0))
data_long <-
  mutate(data_long, TreatGBR = ifelse((year >= 2009 &
                                         cntry_code == "GBR"), 1, 0))
data_long <-
  mutate(data_long, TreatCAN = ifelse((year >= 2007 &
                                         cntry_code == "CAN"), 1, 0))
data_long <-
  mutate(data_long, TreatZAF = ifelse((year >= 2007 &
                                         cntry_code == "ZAF"), 1, 0))

# (...)

data_long_2 <- data_long %>% mutate(Treat = ifelse())


# Reverse causality

ggplot(data_long, aes(x=gdppcgr, y=lifeexp)) + geom_point() + theme_bw()


# ANALYSIS for UK (GBR) --------------------------------------------------------

data_long_uk <-
  filter(
    data_long,
    cntry_code %in% c(
      "GBR",
      "CZE",
      "LUX",
      "SVN",
      "ISR",
      "DNK",
      "FIN",
      "SWE",
      "ISL",
      "NZL",
      "EST",
      "LTU",
      "LVA",
      "SVK",
      "HUN",
      "CHL",
      "TUR",
      "CRI",
      "MEX",
      "COL",
      "IND"
#      "IDN"
    )
  )


data_long_uk <-
  filter(
    data_long,
    cntry_code %in% c(
      "GBR",
      "CZE",
      "LUX",
      "SVN",
      "ISR",
      "DNK",
      "FIN",
      "SWE",
      "ISL",
      "NZL",
      "EST",
      "LTU",
      "LVA",
      "SVK",
      "HUN",
      "CHL",
      "TUR",
      "CRI"
    )
  )


data_long_uk <- data.frame(data_long_uk)

panelView(
  gdppcgr ~ TreatGBR,
  data = filter(data_long_uk,!(cntry_code %in% c()) &
                  (year <= 2019 & year >= 1993)),
  # 2012 (London Olympics)
  #  data = filter( data_long_uk, ( cntry_code %in% c( "NZL", "AUS", "GBR" ) ) & ( year <= 2019 & year >= 1993 ) ), # 2012 (London Olympics)
  index = c("cntry", "year"),
  xlab = "Year",
  ylab = "Outcome",
  axis.adjust = T,
  theme.bw = TRUE,
  pre.post = TRUE,
  type = "outcome",
  by.group = F,
  cex.main = 11,
  cex.main.sub = 11,
  #  main = "",
  #  id = c(""),
  #  ylim = c(0, 50),
  legendOff = T
)

# ----


# With controls
system.time(
  out_uk_gdp <-
    gsynth(
      gdppcgr ~ TreatGBR + inflation + industry + export + lifeexp,
      Y = gdppcgr,
      D = TreatGBR,
      data = filter(data_long_uk, year <= 2019 &
                      year >= 1993),
      # 2012 (London Olympics)
      na.rm = T,
      EM = T,
      parallel = TRUE,
      index = c("cntry_code", "year"),
      se = T,
      inference = "parametric",
      k = 10,
      min.T0 = 5,
      r = c(0, 4),
      CV = T,
      force = "two-way",
      seed = 2000,
      estimator = "ife",
      nboots = 2000,
      criterion = "mspe"
    )
)

# ----


# Print results

print(out_uk_gdp)
out_uk_gdp$wgt.implied
out_uk_gdp$MSPE
out_uk_gdp$sigma2
out_uk_gdp$factor

out_uk_gdp$id.co

# print(out_uk_unemp)
# out_uk_unemp$wgt.implied

plot(out_uk_gdp, type = "factors", theme.bw = T, main = "GBR")
plot(out_uk_gdp, type = "loadings", theme.bw = T)


# Plot results (dim: 596x387 px)

plot(out_uk_gdp,
     theme.bw = T,
     xlab = "Year",
     main = "GBR")

# plot(out_uk_unemp, theme.bw = T, xlab = "Year", main = "UK")


plot(
  out_uk_gdp,
  type = "counterfactual",
  raw = "band",
  shade.post = F,
  theme.bw = T,
  axis.adjust = T,
  main = "GBR"
)

# ----



# ANALYSIS for BRA -------------------------------------------------------------


data_long_bra <-
  filter(
    data_long,
    cntry_code %in% c(
      "BRA",
      "CZE",
      "LUX",
      "SVN",
      "ISR",
      "DNK",
      "FIN",
      "SWE",
      "ISL",
      "NZL",
      "EST",
      "LTU",
      "LVA",
      "SVK",
      "HUN",
      "CHL",
      "TUR",
      "CRI",
      "MEX",
      "COL",
      "IND"
#      "IDN"
    )
  )

data_long_bra <-
  filter(
    data_long,
    cntry_code %in% c(
      "BRA",
      "EST",
      "LTU",
      "LVA",
      "SVK",
      "HUN",
      "CHL",
      "TUR",
      "CRI",
      "MEX",
      "COL",
      "IND"
#      "IDN"
    )
  )



data_long_bra <- data.frame(data_long_bra)

panelView(
  gdppcgr ~ TreatBRA,
  data = filter(data_long_bra,!(cntry_code %in% c()) &
                  (year <= 2019 & year >= 1993)),
  # 2016 Olympics in Rio
  index = c("cntry_code", "year"),
  xlab = "Year",
  ylab = "ID",
  theme.bw = TRUE,
  pre.post = TRUE,
#  type = "outcome"
)


# With controls

system.time(
  out_bra_gdp <-
    gsynth(
      gdppcgr ~ TreatBRA + inflation + industry + export + lifeexp,
      Y = gdppcgr,
      D = TreatBRA,
      data = filter(data_long_bra, (year <= 2019 &
                                      year >= 1993)),
      # 2014 World Cup,
      na.rm = T,
      EM = T,
      parallel = F,
      index = c("cntry_code", "year"),
      se = T,
      inference = "parametric",
      k = 10,
      min.T0 = 5,
      r = c(0, 4),
      CV = T,
      force = "two-way",
      seed = 2000,
      estimator = "ife",
      nboots = 2000,
      criterion = "mspe"
    )
)


# ----


# Print results

print(out_bra_gdp)
out_bra_gdp$wgt.implied
out_bra_gdp$MSPE
out_bra_gdp$sigma2
out_bra_gdp$factor

out_bra_gdp$id.co


out_bra_gdp$validX

# Plot results (dim: 596x387 px)

plot(out_bra_gdp,
     theme.bw = T,
     xlab = "Year",
     main = "BRA")
# plot(out_bra_unemp, theme.bw = T, xlab = "Year", main = "BRA")

plot(out_bra_gdp, type = "factors", main = "BRA", theme.bw = T)


plot(
  out_bra_gdp,
  type = "counterfactual",
  raw = "band",
  shade.post = F,
  theme.bw = T,
  axis.adjust = T,
  main = "BRA"
)


# ----



# ANALYSIS for ZAF -------------------------------------------------------------

data_long_zaf <-
  filter(
    data_long,
    cntry_code %in% c(
      "ZAF",
      "CZE",
      "LUX",
      "SVN",
      "ISR",
      "DNK",
      "FIN",
      "SWE",
      "ISL",
      "NZL",
      "EST",
      "LTU",
      "LVA",
      "SVK",
      "HUN",
      "CHL",
      "TUR",
      "CRI",
      "MEX",
      "COL",
      "IND"
#      "IDN"
    )
  )

data_long_zaf <-
  filter(
    data_long,
    cntry_code %in% c(
      "ZAF",
      "EST",
      "LTU",
      "LVA",
      "SVK",
      "HUN",
      "CHL",
      "TUR",
      "CRI",
      "MEX",
      "COL",
      "IND"
#      "IDN"
    )
  )

data_long_zaf <- data.frame(data_long_zaf)

panelView(
  gdppcgr ~ TreatZAF,
  data = data_long_zaf,
  index = c("cntry_code", "year"),
  xlab = "Year",
  ylab = "ID",
  theme.bw = TRUE,
  pre.post = TRUE,
#  type = "outcome"
)


# With controls
system.time(
  out_zaf_gdp <-
    gsynth(
      gdppcgr ~ TreatZAF + inflation + industry + export + lifeexp,
      D = TreatZAF,
      data = filter(data_long_zaf, (year <= 2019 &
                                      year >= 1993)),
      # 2010 (ZAF World Cup)
      na.rm = T,
      EM = T,
      parallel = TRUE,
      index = c("cntry_code", "year"),
      se = T,
      inference = "parametric",
      k = 10,
      min.T0 = 5,
      r = c(1, 4),
      CV = T,
      force = "two-way",
      seed = 2000,
      estimator = "ife",
      nboots = 2000
    )
)



# Print results

print(out_zaf_gdp)
out_zaf_gdp$wgt.implied
out_zaf_gdp$MSPE
out_zaf_gdp$sigma2
out_zaf_gdp$factor

# Plot results

plot(out_zaf_gdp,
     theme.bw = T,
     xlab = "Year",
     main = "ZAF")


plot(
  out_zaf_gdp,
  type = "counterfactual",
  raw = "band",
  shade.post = F,
  theme.bw = T,
  axis.adjust = T,
  main = "ZAF"
)

plot(out_zaf_gdp, type = "factors", main = "ZAF", theme.bw = T)

# ----



# ANALYSIS for CAN -------------------------------------------------------------

data_long_can <-
  filter(
    data_long,
    cntry_code %in% c(
      "CAN",
      "CZE",
      "LUX",
      "SVN",
      "ISR",
      "DNK",
      "FIN",
      "SWE",
      "ISL",
      "NZL",
      "EST",
      "LTU",
      "LVA",
      "SVK",
      "HUN",
      "CHL",
      "TUR",
      "CRI",
      "MEX",
      "COL",
      "IND"
#      "IDN"
    )
  )

data_long_can <-
  filter(
    data_long,
    cntry_code %in% c(
      "CAN",
      "CZE",
      "LUX",
      "SVN",
      "ISR",
      "DNK",
      "FIN",
      "SWE",
      "ISL",
      "NZL",
      "EST",
      "LTU",
      "LVA",
      "SVK",
      "HUN",
      "CHL",
      "TUR",
      "CRI"
    )
  )



data_long_can <- data.frame(data_long_can)

panelView(
  gdppcgr ~ TreatCAN,
  data = filter(data_long_can,!(cntry_code %in% c()) &
                  (year <= 2019 & year >= 1993)),
  # 2010 Vancouver Olympics
  index = c("cntry_code", "year"),
  xlab = "Year",
  ylab = "ID",
  theme.bw = TRUE,
  pre.post = TRUE,
  #  type = "outcome"
)


# With controls
system.time(
  out_can_gdp <-
    gsynth(
      gdppcgr ~ TreatCAN + inflation + industry + export + lifeexp,
      Y = gdppcgr,
      D = TreatCAN,
      data = filter(data_long_can,!(cntry_code %in% c()) &
                      (year <= 2019 & year >= 1993)),
      # 2010 Vancouver Olympics
      na.rm = T,
      EM = T,
      parallel = TRUE,
      index = c("cntry_code", "year"),
      se = T,
      inference = "parametric",
      k = 10,
      min.T0 = 5,
      r = c(0, 4),
      CV = T,
      force = "two-way",
      seed = 2000,
      estimator = "ife",
      nboots = 2000,
      criterion = "mspe"
    )
)

# ----


# Print results

print(out_can_gdp)
out_can_gdp$wgt.implied
out_can_gdp$MSPE
out_can_gdp$sigma2
out_can_gdp$factor


# Plot results

plot(out_can_gdp,
     theme.bw = T,
     xlab = "Year",
     main = "CAN")

plot(out_can_gdp, type = "factors", theme.bw = T, main = "CAN")


plot(
  out_can_gdp,
  type = "counterfactual",
  raw = "band",
  shade.post = F,
  theme.bw = T,
  axis.adjust = T,
  main = "CAN"
)

# ----



# Print summary results r ------------------------------------------------------

print(out_bra_gdp)
print(out_bra_unemp)
print(out_bra_biz)

print(out_can_gdp)
print(out_can_unemp)
print(out_can_biz)

print(out_uk_gdp)
print(out_uk_unemp)
print(out_uk_biz)

print(out_zaf_gdp)
print(out_zaf_biz)


# END --------------------------------------------------------------------------


#####
## ##
#####



## Joint analysis (optional) ---------------------------------------------------

# TREATMENT


# FOR ALL ----------------------------------------------------------------------

data_long.all <-
  mutate(data_long, Treat = ifelse((year >= 2008 &
                                      cntry_code == "BRA") |
                                     (year >= 2006 &
                                        cntry_code == "GBR") |
                                     (year >= 2004 &
                                        cntry_code == "CAN") |
                                     (year >= 2005 &
                                        cntry_code == "ZAF"),
                                   1,
                                   0
  ))

data_comp <- data.frame(data_long.all)

data_comp <-
  filter(
    data_comp,
    cntry_code %in% c(
      "GBR",
      "CAN",
      "BRA",
      "ZAF",
      "CZE",
      "LUX",
      "SVN",
      "ISR",
      "DNK",
      "FIN",
      "SWE",
      "ISL",
      "NZL",
      "EST",
      "LTU",
      "LVA",
      "SVK",
      "HUN",
      "CHL",
      "TUR",
      "CRI",
      "MEX",
      "COL",
      "IND"
#      "IDN"
    )
  )


# Analysis ---------------------------------------------------------------------

# ALL --------------------------------------------------------------------------

panelView(
  lifeexp ~ Treat,
  data = filter(data_comp,
                  (year <= 2019 & year >= 1993)),
  index = c("cntry_code", "year"),
  xlab = "Year",
  ylab = "GDP per capita growth rate",
  theme.bw = TRUE,
  axis.adjust = T,
  pre.post = TRUE,
 type = "outcome",
  by.group = F,
  cex.main = 11,
  cex.main.sub = 11,
  legendOff = F,
#  color = c("#bbbbbb", "#888888", "black"),
  background = "#ffffff"
)

system.time(
  out_all_gdppcgr <-
    gsynth(
      gdppcgr ~ Treat + inflation + industry + export + lifeexp,
      data = filter(data_comp, year <= 2019 & year >= 1993),
      na.rm = T,
      EM = T,
      parallel = TRUE,
      index = c("cntry_code", "year"),
      se = T,
      inference = "parametric",
      min.T0 = 5,
      r = c(0, 4),
      CV = T,
      force = "two-way",
      seed = 2000,
      estimator = "ife",
      nboots = 2000,
      criterion = "mspe"
    )
)


print(out_all_gdppcgr)

plot(out_all_gdppcgr, theme.bw = T, main = "Gap plot or the estimated dynamic treatment effects with 95% confidence intervals; The 95 percent confidence intervals for the ATT are based on bootstraps")

plot(
  out_all_gdppcgr,
  type = "counterfactual",
  raw = "band",
  shade.post = F,
  theme.bw = T,
  axis.adjust = F,
  main = "Estimated Y(0) Average for the Treated vs Treated average"
  #  id = "CAN"
)

plot(out_all_gdppcgr, type = "factors", theme.bw = T)




# ------------------------------------------------------------------------------

# Plot Total general government deficit

ggplot(data = filter(tot_gov_def, (
  LOCATION %in% c(
    "GBR",
    "CAN",
    "BRA",
    "ZAF",
    "CZE",
    "LUX",
    "SVN",
    "ISR",
    "DNK",
    "FIN",
    "SWE",
    "ISL",
    "NZL",
    "EST",
    "LTU",
    "LVA",
    "SVK",
    "HUN",
    "CHL",
    "TUR",
    "CRI",
    "MEX",
    "COL",
    "IND"
  )
) & (TIME >= 1993 & TIME <= 2019))) +
  geom_line(aes(x = TIME, y = Value, color = LOCATION)) + theme_bw() + theme(legend.position = "bottom") + labs(colour = "Country", x = "Year", y = "General government deficit")

# ----


# Box-plots for covariates

data_comp.2 <- data_comp %>% mutate(D = ifelse(cntry_code %in% c("BRA", "CAN", "GBR", "ZAF"), "Treated", "Control"))

ggplot(data = data_comp.2) +
  geom_boxplot( aes(x = factor(year), y = lifeexp, colour = D )) + theme_bw() + theme(legend.position = "bottom") + labs(colour = "Group", x = "Year", y = "Life expectancy") + theme(axis.text.x = element_text(angle=45))



# --- Desc stat

library(psych)

desc.lifeexp <- describeBy(gdppcgr ~ year, mat = T,
                             digits = 3,
                             data = data.frame(data_long))

desc.lifeexp[c(2:8, 11:12)]

desc.gdppcgr <- describeBy(gdppcgr ~ year, mat = T,
                           digits = 4,
                           data = data.frame(data_long))

desc.gdppcgr[c(2:8, 11:12)]

# ------------------------------------------------------------------------------


# --- Lifeexp vs GDP growth rate

ggplot(data = filter(data_long, (
  cntry_code %in% c(
    "GBR",
    "CAN",
    "BRA",
    "ZAF",
    "CZE",
    "LUX",
    "SVN",
    "ISR",
    "DNK",
    "FIN",
    "SWE",
    "ISL",
    "NZL",
    "EST",
    "LTU",
    "LVA",
    "SVK",
    "HUN",
    "CHL",
    "TUR",
    "CRI",
    "MEX",
    "COL",
    "IND"
  )
) & (year >= 1993 & year <= 2019))) +
  geom_point( aes(y = lifeexp, x = gdppcgr, color = cntry_code )) + facet_wrap(~ year) + theme_bw() + theme(legend.position = "bottom") + labs(y = "Life expectancy at birth", x = "GDP per capita growth rate", color = "Country") + theme(axis.text.x = element_text(angle=45))


ggplot(data = filter(data_long, (
  cntry_code %in% c(
    "GBR",
    "CAN",
    "BRA",
    "ZAF",
    "CZE",
    "LUX",
    "SVN",
    "ISR",
    "DNK",
    "FIN",
    "SWE",
    "ISL",
    "NZL",
    "EST",
    "LTU",
    "LVA",
    "SVK",
    "HUN",
    "CHL",
    "TUR",
    "CRI",
    "MEX",
    "COL",
    "IND"
  )
) & (year >= 1993 & year <= 2019))) +
  geom_point( aes(y = lifeexp, x = gdppcgr, color = cntry_code )) + theme_bw() + theme(legend.position = "bottom") + labs(y = "Life expectancy at birth", x = "GDP per capita growth rate", color = "Country") + theme(axis.text.x = element_text(angle=45))

# ----

# Saving on object in RData format
# save(data_long, file = "data.ME.26.12.21.RData")

